package com.wuji1626.spark.rdd.transform

import org.apache.spark.{SparkConf, SparkContext}

import java.text.SimpleDateFormat
import java.util.Locale

object RDD_Transform_filter_LogFilter {

  def main(args: Array[String]): Unit = {
    // Step1: 准备环境
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
    val sc = new SparkContext(sparkConf)


    // Step2: 算子 groupBy
    val rdd = sc.textFile("datas/apache.log")
    var timeRDD = rdd.filter(
      line => {
        val data = line.split(" ")
        val time = data(3)
        time.startsWith("17/May/2023")
      }
    ).collect().foreach(println)

    // Step3: 关闭环境
    sc.stop()
  }

}
